	
	****************************************************************
	****************************************************************
	
	** Append all countries
		
		use "$dir/data/gha.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="gha"
		gen year=2010 if wave=="w1"
		replace year=2014 if wave=="w2"
		replace year=2018 if wave=="w3"
		tempfile gha
		save `gha', replace
		
		use "$dir/data/eth.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="eth"
		gen year=2014 if wave=="w2"
		replace year=2016 if wave=="w3"
		replace year=2018 if wave=="w4"
		tempfile eth
		save `eth', replace
		
		use "$dir/data/nga.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="nga"
		gen year=2011 if wave=="w1p" | wave=="w1h"
		replace year=2013 if wave=="w2p" | wave=="w2h"
		replace year=2016 if wave=="w3p" | wave=="w3h"
		replace year=2019 if wave=="w4p" | wave=="w4h"
		tempfile nga
		save `nga', replace
			
		use "$dir/data/mlw.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="mlw"
		gen year=2005 if wave=="w2"
		replace year=2011 if wave=="w3"
		replace year=2017 if wave=="w4"
		replace year=2020 if wave=="w5"
		tempfile mlw
		save `mlw', replace
		
		use "$dir/data/tza.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="tza"
		gen year=2009 if wave=="w1"
		replace year=2011 if wave=="w2"
		replace year=2013 if wave=="w3"
		replace year=2015 if wave=="w4"
		replace year=2021 if wave=="w5"
		tempfile tza
		save `tza', replace
		
		use "$dir/data/saf.dta", clear
		ren pid indiv
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="saf"
		gen year=2008 if wave=="w1"
		replace year=2011 if wave=="w2"
		replace year=2012 if wave=="w3"
		replace year=2015 if wave=="w4"
		replace year=2017 if wave=="w5"
		tempfile saf
		save `saf', replace
		
		use "$dir/data/uga.dta", clear
		tostring(hhid), replace
		tostring(indiv), replace
		gen country="uga"
		gen year=2010 if wave=="w1"
		replace year=2011 if wave=="w2"
		replace year=2012 if wave=="w3"
		replace year=2014 if wave=="w4"
		replace year=2016 if wave=="w5"
		replace year=2019 if wave=="w6"
		replace year=2020 if wave=="w7"
		tempfile uga
		save `uga', replace
		
		use "$dir/data/niger.dta", clear
		tostring(indiv), replace
		gen country="niger"
		gen year=2011 if wave=="w1"
		replace year=2014 if wave=="w2"
		replace year=2018 if wave=="w3"
		tempfile niger
		save `niger', replace
		
		use `gha', clear
		append using `eth'
		append using `nga'
		append using `mlw'
		append using `tza'
		append using `saf'
		append using `uga'
		append using `niger'
		
	** Keep if in household, and keep important variables
		
		gen notmarried=1-married
		
		sum work_hrs, de
		replace work_hrs=`r(p99)' if work_hrs>=`r(p99)' & !missing(work_hrs)
	
		keep if inhh==1
		keep 																						///
			hhid 																					///
			indiv 																					///
			female 																					///
			age 																					///
			married 																				///
			notmarried																				///
			hh_size 																				///
			depressed 																				/// 
			healthinsurance_ever 																	///
			ill_inj 						                            							///
			ill_inj_stopactivity																	///
			wave                                                                                    ///
			year                                                                                    ///
			urban                                                                                   ///
			age_group5 										                                        ///
			diff_seei 																				///
			diff_hear 																				///
			diff_walk_clim      																	///
			diff_reme 																				///
			diff_self_care																			///
			diff_comm 				                              	                                ///
			disability																				///
			age_group10     																		///
			work_any        																		///
			work_hrs        																		///
			lives_alone																				///
			married    																				///
			country									
		order country hhid indiv
		
	** Summarize key variables by country, looking for issues 
	** (outliers, cases where things are different for a particular wave, etc.)
		
		/*ds
		local contentvars="`r(varlist)'"
		local contentvars=subinstr("`contentvars'","country","",.)
		local contentvars=subinstr("`contentvars'","hhid","",.)
		local contentvars=subinstr("`contentvars'","indiv","",.)
		di "`contentvars'"
		
		gen country_wave=country+"_"+wave
		foreach var in `contentvars' {
			tab country_wave, sum(`var')
		}*/
		
	****************************************************************
	****************************************************************
	
	** Weights
		
		** Wave-specific sample size
			bys country wave: gen samplesize_countrywave=_N
			bys country: gen samplesize=_N
			
		** Wave and age-group sample size
			bys country wave age_group5: gen samplesize_countrywave_5yr=_N
			bys country wave age_group10: gen samplesize_countrywave_10yr=_N
		
		** Merge in country-year populations
			merge m:1 country year using "$dir/data/population_selectcountries.dta", assert(using matched) keep(matched) nogen
		
		** Merge in country-year-5 year agegroup populations
			merge m:1 country year age_group5 using "$dir/data/population_5yr_selectcountries.dta", keep(master matched) nogen
			
		** Merge in country-year-10 year agegroup populations
			merge m:1 country year age_group10 using "$dir/data/population_10yr_selectcountries.dta", keep(master matched) nogen
			
		** labels 
			label var samplesize "Sample size within country"
			label var samplesize_countrywave "Sample size within country and wave"
			label var samplesize_countrywave_5yr "Sample size within country, wave, and 5-year age group"
			label var samplesize_countrywave_10yr "Sample size within country, wave, and 10-year age group"
			label var pop_totalyear "Total population within year"
			label var pop_countryyear "Country population within year"
			label var pop_countryyear_5yr "Country population within year and 5-year age group"
			label var pop_countryyear_10yr "Country population within year and 10-year age group"
			
		** Weights adjusting for population
			gen weights_population=pop_countryyear/samplesize_countrywave

		** Weights adjusting for age-specific population
			gen weights_population_5yr=pop_countryyear_5yr/samplesize_countrywave_5yr
			gen weights_population_10yr=pop_countryyear_10yr/samplesize_countrywave_10yr
			
				gen diff=weights_population_5yr-weights_population
				tab age_group5, sum(diff)
				preserve
				keep country year age_group5 weights*
				duplicates drop
				*br // weights taking into consideration age structure upweight the largest age groups (<5 and 20s, 30s) and downweights the smaller age groups 
				restore
				drop diff
				
		** Merge in country-wave survey weights
			compress
			merge m:1 country wave hhid using "$dir/data/weights.dta", gen(merge_weights)
			
				gen weights_survey_missing=missing(weights_survey)
				tab country weights_survey_missing
				tab country merge_weights
					
					drop if merge_weights==2 // only in the weights file
				
				** several issues: 
				** 1) survey weights missing in some cases (weights_survey_missing==1)
				** 2) observations appear in the data, not in the weights file (merge_weights==1)
				** 3) observations appear in the weights file, not in the data (merge_weights==2) 
				
				tab wave if weights_survey_missing==1 & country=="South Africa"
				
				tab country weights_survey_missing
			
				** solution: impute missing weights with country-wave means
					bys country wave hhid: gen temp_index=_n
					gen temp_weights_survey=weights_survey if temp_index==1 // 1 observation per hh
					bys country wave: egen temp=mean(temp_weights_survey) // mean by country and wave
					replace weights_survey=temp if missing(weights_survey)
					drop temp*
					
					assert !missing(weights_survey)
					
		** Normalize country-level weights to go from 0 to 1 (all countries on same scale, but internally consistent
			
			** not winsorized
				bys country wave: egen temp_max=max(weights_survey)
				bys country wave: egen temp_min=min(weights_survey)
				gen weights_survey_norm=(weights_survey/temp_max)
				drop temp_max temp_min
				
			** winsorized at 1% 99%
				bys country wave: egen temp_p1=pctile(weights_survey), p(1)
				bys country wave: egen temp_p99=pctile(weights_survey), p(99)
				
				gen weights_survey_w1=weights_survey
				replace weights_survey_w1=temp_p1 if weights_survey<temp_p1
				replace weights_survey_w1=temp_p99 if weights_survey>temp_p99 & !missing(weights_survey)
				
				bys country wave: egen temp_max=max(weights_survey_w1)
				bys country wave: egen temp_min=min(weights_survey_w1)
				gen weights_survey_w1_norm=(weights_survey_w1/temp_max)
				drop temp_max temp_min temp_p1 temp_p99
				
			** winsorized at 5% 95% 
				bys country wave: egen temp_p5=pctile(weights_survey), p(5)
				bys country wave: egen temp_p95=pctile(weights_survey), p(95)
				
				gen weights_survey_w5=weights_survey
				replace weights_survey_w5=temp_p5 if weights_survey<temp_p5
				replace weights_survey_w5=temp_p95 if weights_survey>temp_p95 & !missing(weights_survey)
				
				bys country wave: egen temp_max=max(weights_survey_w5)
				bys country wave: egen temp_min=min(weights_survey_w5)
				gen weights_survey_w5_norm=(weights_survey_w5/temp_max)
				drop temp_max temp_min temp_p5 temp_p95
			
		** Weights adjusting for survey sampling and design
			gen weights_population_survey=weights_population*weights_survey_norm
			gen weights_population5yr_survey=weights_population_5yr*weights_survey_norm
			gen weights_population10yr_survey=weights_population_10yr*weights_survey_norm
			
			gen weights_population_surveyw1=weights_population*weights_survey_w1_norm
			gen weights_population5yr_surveyw1=weights_population_5yr*weights_survey_w1_norm
			gen weights_population10yr_surveyw1=weights_population_10yr*weights_survey_w1_norm
			
		** Label weights
		
			label var weights_population "Weights adjusting for population"
			label var weights_population_5yr "Weights adjusting for population in 5 year age groups"
			label var weights_population_10yr "Weights adjusting for population in 10 year age groups"
			
			label var weights_survey "Country-wave survey weights"
			label var weights_survey_m "Missing country-wave survey weights"
			label var weights_survey_missing "Missing country-wave survey weights"
			label var weights_survey_norm "Country-wave survey weights (scaled from 0 to 1)"
			label var weights_survey_w1 "Country-wave survey weights (winsorizing top and bottom 1%)"
			label var weights_survey_w1_norm "Country-wave survey weights (winsorizing top and bottom 1%, scaled from 0 to 1)"
			label var weights_survey_w5 "Country-wave survey weights (winsorizing top and bottom 5%)"
			label var weights_survey_w5_norm "Country-wave survey weights (winsorizing top and bottom 5%, scaled from 0 to 1)"
			
			label var weights_population_survey "Country-wave survey weights, adjusting for population"
			label var weights_population5yr_survey "Country-wave survey weights, adjusting for population in 5 year age groups"
			label var weights_population10yr_survey "Country-wave survey weights, adjusting for population in 10 year age groups"
			
			label var weights_population_surveyw1 "Country-wave survey weights, adjusting for population (winsorizing 1%)"
			label var weights_population5yr_surveyw1 "Country-wave survey weights, adjusting for population in 5 year age groups (winsorizing 1%)"
			label var weights_population10yr_surveyw1 "Country-wave survey weights, adjusting for population in 10 year age groups (winsorizing 1%)"
			
		** Other labels 
			label var country "Country"
			label var wave "Wave"
			label var year "Year"
			label var hhid "Household ID (constructed)"
			label var indiv "Individual ID (constructed)"
			label var urban "Urban"
			label var female "Female"
			label var age "Age"
			label var married "Married"
			label var hh_size "Household size"
			label var depressed "Depressed"
			label var healthinsurance_ever "Ever had health insurance"
			label var ill_inj "Recent illness or injury"
			label var ill_inj_stopactivity "Stopped activity due to illness or injury"
			label var age_group5 "5 year age group"
			label var age_group10 "10 year age group"
			label var lives_alone "Lives alone"
			label var work_hrs "Weekly work hours"
			label var diff_seei "Difficulty seeing"
			label var diff_hear "Difficulty hearing"
			label var diff_walk_clim "Difficulty walking or climbing stairs"
			label var diff_reme "Difficulty remembering"
			label var diff_self_care "Difficulty with self care"
			label var diff_comm "Difficulty communicating"
			label var work_any "Employed"
			label var disability "Any functional limitation"
			label var notmarried "Not married"
			
		** Save data
			order country wave year hhid indiv age_group5 age_group10 urban
			compress
			save "$dir/data/allcountries.dta", replace
			
	****************************************************************
	****************************************************************
	
	** Summary and densities of (normalized) survey weights
	
		tab country, sum(weights_survey_norm)
		tab country, sum(weights_survey_w1_norm)
		tab country, sum(weights_survey_w5_norm)
	
		foreach var in weights_survey_norm weights_survey_w1_norm weights_survey_w5_norm {
			
			if "`var'"=="weights_survey_norm" local xtitle="Survey weights"
			if "`var'"=="weights_survey_w1_norm" local xtitle="Survey weights (re-scaled, winsorizing 1%)"
			if "`var'"=="weights_survey_w5_norm" local xtitle="Survey weights (re-scaled, winsorizing 5%)" 
			
			#delimit ;
				twoway 
				(kdensity `var' if country=="niger", lcolor(purple))
				(kdensity `var' if country=="mlw", lcolor(dknavy))
				(kdensity `var' if country=="uga", lcolor(blue))
				(kdensity `var' if country=="eth", lcolor(emerald))
				(kdensity `var' if country=="tza", lcolor(green))
				(kdensity `var' if country=="nga", lcolor(orange))
				(kdensity `var' if country=="gha", lcolor(red))
				(kdensity `var' if country=="saf", lcolor(maroon)),
			graphregion(color(white))
			xtitle("`xtitle'", color(black) size(medsmall))
			xscale(titlegap(2))
			ytitle(Density)
			legend(order(1 "Niger" 2 "Malawi" 3 "Uganda" 4 "Ethiopia" 5 "Tanzania" 6 "Nigeria" 7 "Ghana" 8 "South Africa")
				rows(2)  region(lwidth(none)) size(small));
			#delimit cr
			
			graph export "$dir/output/figures/density_`var'.png", as(png) replace
		
		}
		
	** Summary and densities of population-weighted survey weights
		
		tab country, sum(weights_population_surveyw1)
		tab country, sum(weights_population5yr_surveyw1)
		
		** reality check
		
			gen popshare_countryyear=pop_countryyear/pop_totalyear
			tab country, sum(popshare_countryyear)
			gen test=weights_population_survey/weights_survey_norm
			tab country, sum(test) // should (approximately) match the above
	
		foreach var in weights_population_survey weights_population_surveyw1 weights_population5yr_survey weights_population5yr_surveyw1 {
			
			if "`var'"=="weights_population_survey" local xtitle="Survey weights, adjusted for total population"
			if "`var'"=="weights_population_surveyw1" local xtitle="Survey weights, adjusted for total population"
			if "`var'"=="weights_population5yr_survey" local xtitle="Survey weights, adjusted for age-specific population"
			if "`var'"=="weights_population5yr_surveyw1" local xtitle="Survey weights, adjusted for age-specific population"
			
			#delimit ;
				twoway
				(kdensity `var' if country=="niger", lcolor(purple))
				(kdensity `var' if country=="mlw", lcolor(dknavy))
				(kdensity `var' if country=="uga", lcolor(blue))
				(kdensity `var' if country=="eth", lcolor(emerald))
				(kdensity `var' if country=="tza", lcolor(green))
				(kdensity `var' if country=="nga", lcolor(orange))
				(kdensity `var' if country=="gha", lcolor(red))
				(kdensity `var' if country=="saf", lcolor(maroon)),
			graphregion(color(white))
			xtitle("`xtitle'", color(black) size(medsmall))
			xscale(titlegap(2))
			ytitle(Density)
			legend(order(1 "Niger" 2 "Malawi" 3 "Uganda" 4 "Ethiopia" 5 "Tanzania" 6 "Nigeria" 7 "Ghana" 8 "South Africa")
				rows(2) region(lwidth(none)) size(small));
			#delimit cr
			
			graph export "$dir/output/figures/density_`var'.png", as(png) replace
			
		}
		
		foreach var in weights_population5yr_survey weights_population5yr_surveyw1 {
			
			if "`var'"=="weights_population_survey" local xtitle="Survey weights, adjusted for total population"
			if "`var'"=="weights_population_surveyw1" local xtitle="Survey weights, adjusted for total population"
			if "`var'"=="weights_population5yr_survey" local xtitle="Survey weights, adjusted for age-specific population"
			if "`var'"=="weights_population5yr_surveyw1" local xtitle="Survey weights, adjusted for age-specific population"
			
			#delimit ;
				twoway
				(kdensity `var' if country=="niger" & `var'<3, lcolor(purple))
				(kdensity `var' if country=="mlw" & `var'<3, lcolor(dknavy))
				(kdensity `var' if country=="uga" & `var'<3, lcolor(blue))
				(kdensity `var' if country=="eth" & `var'<3, lcolor(emerald))
				(kdensity `var' if country=="tza" & `var'<3, lcolor(green))
				(kdensity `var' if country=="nga" & `var'<3, lcolor(orange))
				(kdensity `var' if country=="gha" & `var'<3, lcolor(red))
				(kdensity `var' if country=="saf" & `var'<3, lcolor(maroon)),
			graphregion(color(white))
			xtitle("`xtitle'", color(black) size(medsmall))
			xscale(titlegap(2))
			ytitle(Density)
			legend(order(1 "Niger" 2 "Malawi" 3 "Uganda" 4 "Ethiopia" 5 "Tanzania" 6 "Nigeria" 7 "Ghana" 8 "South Africa")
				rows(2) region(lwidth(none)) size(small));
			#delimit cr
			
			graph export "$dir/output/figures/density_`var'_trunc.png", as(png) replace
			
		}
		
	****************************************************************
	****************************************************************

	** Get count
		foreach var in female age married hh_size depressed healthinsurance_ever ill_inj ill_inj_stopactivity urban lives_alone work_hrs work_any notmarried {
				gen m_`var'=missing(`var')
		}
		
		egen missing_all=rowtotal(m_married m_hh_size m_depressed m_healthinsurance_ever m_ill_inj m_ill_inj_stopactivity m_urban m_lives_alone m_work_hrs m_work_any m_notmarried)
		replace missing_all=missing_all==11
		
		count if !missing(female) & !missing(age) // 966,189
		
		egen country_wave=group(country wave)
		distinct country_wave // 35
